clear all
*cap log close
set more off
set seed 603

// other stuff -------------
local nbin = 16
local Q = 2
// -----------------------------------------------------



// --------------------------------------------------------------------------------
// covariates extrapolation plot 

// initial step: data setup ---------------------------
use "${data}/out/4-main", clear 
gen Y = cite_ny1
gen D = harsh
gen W = covbin1
keep Y D Z W totfe troop countynum  
egen G = group(W)

forval q = 1/`Q' {
	gen z_`q' = Z^`q'
}

tempfile data 
save    `data'

* get group weights ----------
gen N = 1 
collapse (sum) N, by(G)
egen Ntot = sum(N)
gen wt = N/Ntot 
keep G wt 
tempfile wt 
save    `wt'


// first part: construct polynomial fits --------------

* initialize storage grid --------
clear 
set obs 101
gen index = _n 
gen zgrid = (_n-1)/100
tempfile grid 
save    `grid'

use `data', clear 
qui summ G 
forval g = 1/`r(max)' {

	use `data', clear
	reghdfe Y z_* if G==`g', absorb(totfe)
	local b0  = _b[_cons]
	forval q = 1/`Q' {
		local b`q' = _b[z_`q']
	}

	use `grid', clear 
	gen G = `g'
	gen fit = `b0'
	forval q = 1/`Q' {
		replace fit = fit + (`b`q''*(zgrid^`q'))
	}

	if `g'==1 {
		tempfile grid_out 
		save    `grid_out'
	}
	else {
		qui append using `grid_out'
		tempfile grid_out
		save    `grid_out'
	}

}

* aggregate up the group-level estimates ---------
use `grid_out', clear 
merge m:1 G using `wt', nogen 
collapse (mean) fit [aw=wt], by(index zgrid)
rename fit fit_sep 
tempfile grid_out 
save    `grid_out'

* add simple fit conditional on controls to grid -----
use `data', clear 
reghdfe Y z_*, absorb(totfe W)
local b0 = _b[_cons]
forval q = 1/`Q' {
	local b`q' = _b[z_`q']
}

use `grid_out', clear 
gen fit_simp = `b0'
forval q = 1/`Q' {
	replace fit_simp = fit_simp + (`b`q''*(zgrid^`q'))
}

tempfile grid_out 
save    `grid_out'


// second part: grouped binsregs ------------------------------

* binsreg by group ---------------------
use `data', clear 
sort Z

binsreg Y Z, absorb(totfe G) binspos(es) nbins(`nbin') savedata(bs1) replace 
binsreg Y Z, absorb(totfe) binspos(es) nbins(`nbin') by(G) samebinsby savedata(bs2) replace 

use bs2, clear 
merge m:1 G using `wt', nogen 
collapse (mean) ybs2 = dots_fit zbs2 = dots_x [aw=wt], by(dots_binid)

merge 1:1 dots_binid using bs1, nogen 
rename dots_binid index 
rename dots_x zbs1
rename dots_fit ybs1 
keep  index zbs1 ybs1 zbs2 ybs2 
order index zbs1 ybs1 zbs2 ybs2 

* attach to data for plot ------------------
tempfile binscatter
save    `binscatter'

use `grid_out', clear 
merge 1:1 index using `binscatter', nogen 
tempfile grid_out 
save    `grid_out'
rm bs1.dta 
rm bs2.dta 


// superimpose grouped plot over baseline plot ------------------------
import delimited using "${est}/recid_poly.csv", clear 

* store quadratic estimates ---------
keep if poly == `Q'
replace par = trim(upper(par))
keep if par == "Y0"|par == "Y1"
gen z = (par=="Y1")
gen flag = 1 
gen y    = est
keep z y ub lb flag 
tempfile est 
save    `est'

* store quadratic fit 
use `data', clear
reghdfe Y z_*, absorb(totfe)
local b0 = _b[_cons]
forval q = 1/`Q' {
	local b`q' = _b[z_`q']
}

* store binscatter ------------------
use "${data}/out/4-main", clear 
gen Y = cite_ny1 
keep Y Z totfe 
binsreg Y Z, absorb(totfe) binspos(es) nbins(`nbin') savedata(bs) replace 

use bs, clear 
keep dots_fit dots_x 
rename dots_x z 
rename dots_fit y
qui append using `est'
rm bs.dta 

gen fit_ovr = `b0'
forval q = 1/`Q' {
	replace fit_ovr = fit_ovr + `b`q''*(z^`q')
}

// compile all data for plot --------------------
sort z 
gen index = _n 
merge 1:1 index using `grid_out', nogen 


// build plot --------------------
sort index
#delimit ;
scatter y z, msymbol(O) mcolor(gs12) msize(medsmall) || 
line fit_ovr z, lcolor(dkgreen) ||
scatter y z if flag==1, mcolor(dkgreen) msize(med) ||
rspike ub lb z if flag==1, lcolor(dkgreen) ||
scatter ybs1 zbs1, msymbol(Oh) mcolor(maroon) ||
line fit_simp zgrid, lcolor(maroon) lpattern(dash) ||
scatter ybs2 zbs2, msymbol(Dh) mcolor(purple) msize(medsmall) ||
line fit_sep zgrid, lcolor(purple) lpattern(longdash)
graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) 
ylab(0.33(0.01)0.38,nogrid) xlab(,nogrid)
xtitle("Officer Stringency") ytitle("Pr(Reoffend)")
legend(ring(0) pos(2) cols(1) size(small) order(5 6 7 8) region(lstyle(border))
lab(5 "Add Cov: Binscatter") 
lab(6 "Add Controls: Quadratic Fit")
lab(7 "Separate by Cov: Binscatter")
lab(8 "Separate by Cov: Quadratic Fit")) ;
#delimit cr
graph export "${out}/apx_extrap/groups_cov.pdf", replace
// --------------------------------------------------------------------------------





// --------------------------------------------------------------------------------
// locations extrapolation plot ----------------

// initial step: data setup ---------------------------
use "${data}/out/4-main", clear 
gen Y = cite_ny1
gen D = harsh
gen W = covbin1
keep Y D Z W totfe troop countynum  
egen G = group(troop)

forval q = 1/`Q' {
	gen z_`q' = Z^`q'
}

tempfile data 
save    `data'

* get group weights ----------
gen N = 1 
collapse (sum) N, by(G)
egen Ntot = sum(N)
gen wt = N/Ntot 
keep G wt 
tempfile wt 
save    `wt'


// first part: construct polynomial fits --------------

* initialize storage grid --------
clear 
set obs 101
gen index = _n 
gen zgrid = (_n-1)/100
tempfile grid 
save    `grid'

use `data', clear 
qui summ G 
forval g = 1/`r(max)' {

	use `data', clear
	reghdfe Y z_* if G==`g', absorb(totfe)
	local b0  = _b[_cons]
	forval q = 1/`Q' {
		local b`q' = _b[z_`q']
	}

	use `grid', clear 
	gen G = `g'
	gen fit = `b0'
	forval q = 1/`Q' {
		replace fit = fit + (`b`q''*(zgrid^`q'))
	}

	if `g'==1 {
		tempfile grid_out 
		save    `grid_out'
	}
	else {
		qui append using `grid_out'
		tempfile grid_out
		save    `grid_out'
	}

}

* aggregate up the group-level estimates ---------
use `grid_out', clear 
merge m:1 G using `wt', nogen 
collapse (mean) fit [aw=wt], by(index zgrid)
rename fit fit_sep 
tempfile grid_out 
save    `grid_out'


* add on SPC estimates (FM 2022) ------------
import delimited using "${temp}/spc_tilde.csv", clear 
forval q = 1/`Q' {
	gen z_`q' = dtil^`q'
}

reghdfe ytil z_* [aw=nwt], absorb(loc)
local b0  = _b[_cons]
forval q = 1/`Q' {
	local b`q' = _b[z_`q']
}

use `grid_out', clear 
gen fit_spc = `b0'
forval q = 1/`Q' {
	replace fit_spc = fit_spc + `b`q''*(zgrid^`q')
} 

tempfile grid_out
save    `grid_out'


// next part: binsreg by groups --------------------

* binscatter by troops --------
use `data', clear 
sort Z

binsreg Y Z, absorb(totfe) binspos(es) nbins(`nbin') by(G) samebinsby savedata(bs) replace
use bs, clear
merge m:1 G using `wt', nogen
collapse (mean) dots_fit dots_x [aw=wt], by(dots_binid)
ren dots_fit   ybsg
ren dots_x     zbsg
ren dots_binid index 

keep  index zbsg ybsg 
order index zbsg ybsg  
tempfile bs1
save    `bs1'
rm bs.dta

* binscatter for SPC approach (FM 2022) -------
import delimited using "${temp}/spc_tilde.csv", clear 
binsreg ytil dtil [aw=nwt], absorb(loc) binspos(es) nbins(`nbin') savedata(bs) replace
use bs, clear
ren dots_fit   yfm1 
ren dots_x     zfm1 
ren dots_binid index

keep  index zfm1 yfm1 
order index zfm1 yfm1 
tempfile bs2
save    `bs2'
rm bs.dta

* binscatter for SPC quantiles approach ---------
import delimited using "${temp}/spc_tilde.csv", clear 
gen nstops = nwt

local nq=10
egen q = xtile(dtil), nq(`nq') by(loc) weight(nstops)
egen nstops_loc = sum(nstops), by(loc)
egen nstops_loc_q = sum(nstops), by(loc q)
gen weight = (nstops * nstops_loc)/nstops_loc_q

egen dq = wtmean(dtil), by(q) weight(weight)
egen yq = wtmean(ytil), by(q) weight(weight)
bysort q: gen keepq=(_n==1)
keep if keepq == 1

gen zfm2 = dq 
gen yfm2 = yq 
sort dq 
gen index = _n 

keep  index zfm2 yfm2
order index zfm2 yfm2
tempfile bs3
save    `bs3'

* add all the binscatters to the grid ------------------
use `grid_out', clear 
merge 1:1 index using `bs1', nogen 
merge 1:1 index using `bs2', nogen 
merge 1:1 index using `bs3', nogen 
tempfile grid_out 
save    `grid_out'


// superimpose grouped plot over baseline plot ------------------------
import delimited using "${est}/recid_poly.csv", clear 

* store quadratic estimates ---------
keep if poly == `Q'
replace par = trim(upper(par))
keep if par == "Y0"|par == "Y1"
gen z = (par=="Y1")
gen flag = 1 
gen y    = est
keep z y ub lb flag 
tempfile est 
save    `est'

* store quadratic fit 
use `data', clear
reghdfe Y z_*, absorb(totfe)
local b0 = _b[_cons]
forval q = 1/`Q' {
	local b`q' = _b[z_`q']
}

* store binscatter ------------------
use "${data}/out/4-main", clear 
gen Y = cite_ny1 
keep Y Z totfe 
binsreg Y Z, absorb(totfe) binspos(es) nbins(`nbin') savedata(bs) replace 

use bs, clear 
keep dots_fit dots_x 
rename dots_x z 
rename dots_fit y
qui append using `est'
rm bs.dta 

gen fit_ovr = `b0'
forval q = 1/`Q' {
	replace fit_ovr = fit_ovr + `b`q''*(z^`q')
}

// compile all data for plot --------------------
sort z 
gen index = _n 
merge 1:1 index using `grid_out', nogen 


// build plot --------------------
sort index 
#delimit ;
scatter y z, msymbol(O) mcolor(gs12) msize(medsmall) || 
line fit_ovr z, lcolor(dkgreen) ||
scatter y z if flag==1, mcolor(dkgreen) msize(med) ||
rspike ub lb z if flag==1, lcolor(dkgreen) ||
scatter ybsg zbsg, msymbol(Oh) mcolor(maroon) ||
line fit_sep zgrid, lpattern(dash) lcolor(maroon) ||
scatter yfm1 zfm1, msymbol(Dh) mcolor(purple) msize(medsmall) ||
line fit_spc zgrid, lpattern(longdash) lcolor(purple) ||
scatter yfm2 zfm2, msymbol(Sh) mcolor(dkorange)
graphregion(color(white)) plotregion(lcolor(black) lwidth(medthin)) 
ylab(0.33(0.01)0.40,nogrid) xlab(,nogrid)
xtitle("Officer Stringency") ytitle("Pr(Reoffend)")
legend(ring(0) pos(2) cols(2) size(small) order(5 6 7 8 9) region(lstyle(border))
lab(5 "Troops: Binscatter") 
lab(6 "Troops: Quadratic Fit")
lab(7 "FM: Binscatter")
lab(8 "FM: Quadratic Fit")
lab(9 "FM: Quantiles"));
#delimit cr
graph export "${out}/apx_extrap/groups_loc.pdf", replace
// --------------------------------------------------------------------------------




// --------------------------------------------------------------------------------
// levels and gains plot within groups ----------------

* set files we need to read in ----------
local Q     = 2
local file1 = "recid_poly"
local file2 = "recid_cov"
local file3 = "recid_troop"
local file4 = "recid_spc"

* compile output from these files ---------
forval j = 1/4 {
	
	import delimited "${est}/`file`j''.csv", clear
	replace par = trim(upper(par))
	keep if poly == `Q'

	* populate levels and gains ----------------
	do "${code}/utility/create_levels_gains.do"

	drop if mi(y0)
	keep ind y0 y0u y0l y1 y1u y1l delta deltau deltal
	gen G = `j'

	replace ind = 0.2 if _n == 1
	replace ind = 0.5  if _n == 2 
	replace ind = 0.8 if _n == 3

	tempfile temp`j'
	save    `temp`j''
	
}

use `temp1', clear
qui append using `temp2'
qui append using `temp3'
qui append using `temp4'

* adjust axes for plot --------------
replace ind = ind-0.04 if G==1
replace ind = ind+0.04 if G==3 
replace ind = ind+0.08 if G==4 

* build plot -----------------------
#delimit ; 
twoway 
(rbar y1u y1l ind if G==1, color(navy%20) barwidth(0.02))
(scatter y0 ind if G==1, msize(large) msymbol(Oh) mcolor(navy))
(pcarrow y0 ind y1 ind if G==1, lcolor(navy) lwidth(medthick) mcolor(navy) barbsize(medlarge))
(rbar y1u y1l ind if G==2, color(dkgreen%20) barwidth(0.02))
(scatter y0 ind if G==2, msize(large) msymbol(Sh) mcolor(dkgreen))
(pcarrow y0 ind y1 ind if G==2, lcolor(dkgreen) lwidth(medthick) mcolor(dkgreen) barbsize(medlarge))
(rbar y1u y1l ind if G==3, color(pruple%20) barwidth(0.02))
(scatter y0 ind if G==3, msize(large) msymbol(Dh) mcolor(purple))
(pcarrow y0 ind y1 ind if G==3, lcolor(purple) lwidth(medthick) mcolor(purple) barbsize(medlarge))
(rbar y1u y1l ind if G==4, color(sienna%20) barwidth(0.02))
(scatter y0 ind if G==4, msize(large) msymbol(Th) mcolor(sienna))
(pcarrow y0 ind y1 ind if G==4, lcolor(sienna) lwidth(medthick) mcolor(sienna) barbsize(medlarge)),
xscale(r(0 1)) xlab(,nogrid)
xlabel(0.2 "All Motorists" 0.5 "Treated" 0.8 "Untreated") ylab(,nogrid) 
legend(ring(0) pos(8) order(2 5 8 11) cols(1) region(lstyle(border))
label(2 "Baseline") label(5 "Within Covariates")
label(8 "Within Troops") label(11 "Within Locations")) 
xtitle("") ytitle("Pr(Reoffend)") 
graphregion(color(white)) bgcolor(white) plotregion(lcolor(black) lwidth(medthin)) ;
#delimit cr
graph export "${out}/apx_extrap/groups_est.pdf", replace
// --------------------------------------------------------------------------------



